from vosk import Model, KaldiRecognizer
import wave
import json

model = Model(r"vosk-model-en-us-0.22")

def recognize_speech(audio_path):
    wf = wave.open(audio_path, "rb")
    
    # Create recognizer and enable word output
    recognizer = KaldiRecognizer(model, wf.getframerate())
    recognizer.SetWords(True)  # Enable word output

    result = ""
    final_result = ""  # To store the final result
    while True:
        data = wf.readframes(4000)
        if len(data) == 0:
            break
        if recognizer.AcceptWaveform(data):
            result += recognizer.Result()  # Complete recognition result
        else:
            result += recognizer.PartialResult()  # Partial result

    # Parse the final recognition result
    final_result = recognizer.FinalResult()  # Get the final result

    # Parse and extract the recognized text
    final_result_dict = json.loads(final_result)
    recognized_text = final_result_dict.get('text', '')
    return recognized_text
